{
  "cells": [
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "collapsed": false
      },
      "outputs": [],
      "source": [
        "%matplotlib inline"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "\n# Comparing different hierarchical linkage methods on toy datasets\n\n\nThis example shows characteristics of different linkage\nmethods for hierarchical clustering on datasets that are\n\"interesting\" but still in 2D.\n\nThe main observations to make are:\n\n- single linkage is fast, and can perform well on\n  non-globular data, but it performs poorly in the\n  presence of noise.\n- average and complete linkage perform well on\n  cleanly separated globular clusters, but have mixed\n  results otherwise.\n- Ward is the most effective method for noisy data.\n\nWhile these examples give some intuition about the\nalgorithms, this intuition might not apply to very high\ndimensional data.\n\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "collapsed": false
      },
      "outputs": [],
      "source": [
        "print(__doc__)\n\nimport time\nimport warnings\n\nimport numpy as np\nimport matplotlib.pyplot as plt\n\nfrom sklearn import cluster, datasets\nfrom sklearn.preprocessing import StandardScaler\nfrom itertools import cycle, islice\n\nnp.random.seed(0)"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "Generate datasets. We choose the size big enough to see the scalability\nof the algorithms, but not too big to avoid too long running times\n\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "collapsed": false
      },
      "outputs": [],
      "source": [
        "n_samples = 1500\nnoisy_circles = datasets.make_circles(n_samples=n_samples, factor=.5,\n                                      noise=.05)\nnoisy_moons = datasets.make_moons(n_samples=n_samples, noise=.05)\nblobs = datasets.make_blobs(n_samples=n_samples, random_state=8)\nno_structure = np.random.rand(n_samples, 2), None\n\n# Anisotropicly distributed data\nrandom_state = 170\nX, y = datasets.make_blobs(n_samples=n_samples, random_state=random_state)\ntransformation = [[0.6, -0.6], [-0.4, 0.8]]\nX_aniso = np.dot(X, transformation)\naniso = (X_aniso, y)\n\n# blobs with varied variances\nvaried = datasets.make_blobs(n_samples=n_samples,\n                             cluster_std=[1.0, 2.5, 0.5],\n                             random_state=random_state)"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "Run the clustering and plot\n\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "collapsed": false
      },
      "outputs": [],
      "source": [
        "# Set up cluster parameters\nplt.figure(figsize=(9 * 1.3 + 2, 14.5))\nplt.subplots_adjust(left=.02, right=.98, bottom=.001, top=.96, wspace=.05,\n                    hspace=.01)\n\nplot_num = 1\n\ndefault_base = {'n_neighbors': 10,\n                'n_clusters': 3}\n\ndatasets = [\n    (noisy_circles, {'n_clusters': 2}),\n    (noisy_moons, {'n_clusters': 2}),\n    (varied, {'n_neighbors': 2}),\n    (aniso, {'n_neighbors': 2}),\n    (blobs, {}),\n    (no_structure, {})]\n\nfor i_dataset, (dataset, algo_params) in enumerate(datasets):\n    # update parameters with dataset-specific values\n    params = default_base.copy()\n    params.update(algo_params)\n\n    X, y = dataset\n\n    # normalize dataset for easier parameter selection\n    X = StandardScaler().fit_transform(X)\n\n    # ============\n    # Create cluster objects\n    # ============\n    ward = cluster.AgglomerativeClustering(\n        n_clusters=params['n_clusters'], linkage='ward')\n    complete = cluster.AgglomerativeClustering(\n        n_clusters=params['n_clusters'], linkage='complete')\n    average = cluster.AgglomerativeClustering(\n        n_clusters=params['n_clusters'], linkage='average')\n    single = cluster.AgglomerativeClustering(\n        n_clusters=params['n_clusters'], linkage='single')\n\n    clustering_algorithms = (\n        ('Single Linkage', single),\n        ('Average Linkage', average),\n        ('Complete Linkage', complete),\n        ('Ward Linkage', ward),\n    )\n\n    for name, algorithm in clustering_algorithms:\n        t0 = time.time()\n\n        # catch warnings related to kneighbors_graph\n        with warnings.catch_warnings():\n            warnings.filterwarnings(\n                \"ignore\",\n                message=\"the number of connected components of the \" +\n                \"connectivity matrix is [0-9]{1,2}\" +\n                \" > 1. Completing it to avoid stopping the tree early.\",\n                category=UserWarning)\n            algorithm.fit(X)\n\n        t1 = time.time()\n        if hasattr(algorithm, 'labels_'):\n            y_pred = algorithm.labels_.astype(np.int)\n        else:\n            y_pred = algorithm.predict(X)\n\n        plt.subplot(len(datasets), len(clustering_algorithms), plot_num)\n        if i_dataset == 0:\n            plt.title(name, size=18)\n\n        colors = np.array(list(islice(cycle(['#377eb8', '#ff7f00', '#4daf4a',\n                                             '#f781bf', '#a65628', '#984ea3',\n                                             '#999999', '#e41a1c', '#dede00']),\n                                      int(max(y_pred) + 1))))\n        plt.scatter(X[:, 0], X[:, 1], s=10, color=colors[y_pred])\n\n        plt.xlim(-2.5, 2.5)\n        plt.ylim(-2.5, 2.5)\n        plt.xticks(())\n        plt.yticks(())\n        plt.text(.99, .01, ('%.2fs' % (t1 - t0)).lstrip('0'),\n                 transform=plt.gca().transAxes, size=15,\n                 horizontalalignment='right')\n        plot_num += 1\n\nplt.show()"
      ]
    }
  ],
  "metadata": {
    "kernelspec": {
      "display_name": "Python 3",
      "language": "python",
      "name": "python3"
    },
    "language_info": {
      "codemirror_mode": {
        "name": "ipython",
        "version": 3
      },
      "file_extension": ".py",
      "mimetype": "text/x-python",
      "name": "python",
      "nbconvert_exporter": "python",
      "pygments_lexer": "ipython3",
      "version": "3.6.9"
    }
  },
  "nbformat": 4,
  "nbformat_minor": 0
}